clear all
global system "linux"

if "${system}" == "linux" {
	*global code "/"
	global code "/"
	global s "/"
}

run "${code}${s}_set-path.do"


use "${LPS}/crism-loanlevel-cleaned.dta", clear // 6a-crism-prep.do

replace prin_bal_amount = . if data_as_of == termination_date // replace "0" with "missing" so that a positive number can be filled in
do "${code}/tba_elig/_crism-create-vars.do" // create refi and auto_orig

// additional variables regarding refi
foreach x of varlist anyrefi plainrefi5pct2 /*  prepaid */ {
	by ind_id mcdash_id: egen `x'_dt = max(termination_date * `x')
	gen after_`x' = (data_as_of >= `x'_dt) * (`x'_dt~=0)
}


*** fill in missing values of dynamic variables
sort ind_id mcdash_id data_as_of
local vars "current_int_rate prin_bal_amount delinq_hist_12mon io_flag_curr pp_pen_flag"
foreach x of varlist `vars' {
	by ind_id mcdash_id: replace `x' = `x'[_n-1] if missing(`x') == 1 & after_anyrefi == 1
}

*** drop the six-month after termination 
drop if data_as_of >= termination_date + 6


*** Sample Selection
gen cll = 417000
replace cll = 424100 if year(dofm(data_as_of)) == 2017
replace cll = 453100 if year(dofm(data_as_of)) == 2018

gen bin = ceil((prin_bal_amount - cll)/5000)
replace bin = bin + 10
keep if inrange(bin, 1, 20)

// Those who start with zero auto loans
sort ind_id mcdash_id data_as_of
by ind_id mcdash_id: gen auto0 = auto_balance[1]

// after first refi
gen after_prepaid = (data_as_of >= termination_date) 


// only keep if the main loan's initital balance is large enough
sort ind_id mcdash_id data_as_of
by ind_id mcdash_id: gen orig_amount0 = orig_amount[1]
keep if orig_amount0 >= cll + 25000
drop orig_amount0

gen lowbal = prin_bal_amount <= cll
gen diff = (prin_bal_amount - cll)/1000

// Create Refi time
gen EverPlainRefi = plainrefi5pct2_dt ~= 0 & plainrefi5pct2_dt ~=.
gen refitime = data_as_of - plainrefi5pct2_dt + 6
replace refitime = . if inrange(refitime, 0, 11)==0

sort ind_id mcdash_id data_as_of
by ind_id mcdash_id: egen BalRefi = total(plainrefi5pct2_dt* prin_bal_amount)

// create additional variables
gen loanage = data_as_of - orig_date 

gen purchase = purpose_type_id_mcdash == "1"
egen purpose_id = group(purpose_type_id)

gen owneroccupied = occupancy_type_id == "1"
gen pp_pen = pp_pen_flag == "Y"
gen pp_pen_period = (pp_term_num_mon >= loanage) & pp_pen==1

gen investor_id = 0
replace investor_id = 1 if investor_type_id == "1" // Ginnie
replace investor_id = 2 if inlist(investor_type_id, "2", "3") // GSEs
replace investor_id = 3 if investor_type_id == "4" // PLS
replace investor_id = 4 if investor_type_id == "7" // portfolio
drop if investor_id == 0

gen FullDoc = document_type_id == "1"
gen fctr = prin_bal_amount/ orig_amount

foreach n in 3 6 9 {
	gen past_delinq`n' = regexm(delinq_hist_12mon, "`n'") > 0
}
gen past_forcl = regexm(delinq_hist_12mon, "F") > 0 | regexm(delinq_hist_12mon, "R") > 0

by ind_id mcdash_id: egen EverDelinq = max(past_delinq6 | past_forcl)


gen io_loan = io_flag == "Y"
gen io_period = io_flag_curr == "Y"

*** Updated Ltv
gen updated_price = appraisal_amount* hpi/hpi_orig
gen updated_ltv = prin_bal_amount/ updated_price
gen ultv_bin = ceil(updated_ltv* 10)
replace ultv_bin = max(ultv_bin, 4)
replace ultv_bin = min(ultv_bin, 13)

*** Riskscore
gen risk_bin = 1
foreach x of numlist 2/10 {
	replace risk_bin = `x' if (credit_score_risk3 > 600 + (`x'-1)*25) & (credit_score_risk3 <= 600 + `x'*25) ==1
}

*** origination fixed effect
gen orig_qtr = qofd(dofm(orig_date))
format orig_qtr %tq

*** Previous int rate
destring zip_code, replace

global controls "loanage purpose_id fctr orig_amount risk_bin FullDoc ultv_bin current_int_rate investor_id fm_balance fm_number"
global controls "${controls} loan_to_value owneroccupied pp_pen pp_pen_period past_delinq* past_forcl io_loan io_period"
global auto_controls "lag_auto_balance* auto_orig* auto0 "

keep ind_id mcdash_id data_as_of termination_date *plainrefi* bin primary_fg updated_ltv prop_zip_code zip_code owneroccupied orig_qtr orig_date ///
 ${controls} ${auto_controls} refitime EverPlainRefi diff lowbal fm_balance_change* anyrefi cashrefi* prepaid
egen zip_ym = group(prop_zip_code data_as_of)

*** Avg Market Mortage Rate
merge m:1 data_as_of using "${LPS}/freddie_monthly_30yr_fixed_rate.dta", keep(matched master) nogen
gen diff_rate = ceil((current_int_rate - frm30rate)/ .25)
replace diff_rate = min(max(diff_rate, -2), 11)
qui su diff_rate
replace diff_rate = diff_rate - `r(min)'


*** Max Num Mortgage

save "${LPS}/reg-data-loanlevel.dta", replace
